Installing the libraries that are needed to be able to load and preform queries on the dataset.
library(DataComputing)
library(tidyverse)
library(rvest)
library(lubridate)
library(dplyr)
library(readr)
library(ggplot2)
Load Data
Getting the tables into RStudio to be able to use them for evaluation. We will do this using the read functions.
file_name <- file.choose()
sampleSubmission <- data.table::fread(file_name)
sampleSubmission
file_name <- file.choose()
TrainData <- data.table::fread(file_name)
Some columns are type 'integer64' but package bit64 is not installed. Those columns will print as strange looking floating point data. There is no need to reload the data. Simply install.packages('bit64') to obtain the integer64 print method and print the data again.
TrainData
file_name <- file.choose()
TestData <- data.table::fread(file_name)
Some columns are type 'integer64' but package bit64 is not installed. Those columns will print as strange looking floating point data. There is no need to reload the data. Simply install.packages('bit64') to obtain the integer64 print method and print the data again.
TestData
Data Wrangling
CleanedTrain <-
TrainData %>%
rename(RevenueGrowth = `Revenue Growth`) %>%
rename(CostRevenue = `Cost of Revenue`) %>%
rename(GrossProfit = `Gross Profit`) %>%
rename(RDExpenses = `R&D Expenses`) %>%
rename(SGAExpense= `SG&A Expense`) %>%
rename(OpExpenses = `Operating Expenses`) %>%
rename(OpIncome = `Operating Income`) %>%
rename(InterestExpense = `Interest Expense`) %>%
rename(IncomeTaxExpense = `Income Tax Expense`) %>%
rename(NetIncome= `Net Income`) %>%
rename(ProfitMargin = `Profit Margin`) %>%
rename(NetProfitMargin= `Net Profit Margin`) %>%
rename(TotalCurrentAssets = `Total current assets`) %>%
rename(TotalAssets = `Total assets`) %>%
rename(TotalDebt = `Total debt`) %>%
rename(TaxLiability = `Tax Liabilities`) %>%
rename(TotalNonCurrentAssests = `Net Debt`) %>%
rename(InvestementPurchSales = `Investment purchases and sales`) %>%
rename(AssetGrowth = `Asset Growth`) %>%
rename(DebtGrowth = `Debt Growth`)
CleanedTrain
NA
CleanedTest <-
TestData %>%
rename(RevenueGrowth = `Revenue Growth`) %>%
rename(CostRevenue = `Cost of Revenue`) %>%
rename(GrossProfit = `Gross Profit`) %>%
rename(RDExpenses = `R&D Expenses`) %>%
rename(SGAExpense= `SG&A Expense`) %>%
rename(OpExpenses = `Operating Expenses`) %>%
rename(OpIncome = `Operating Income`) %>%
rename(InterestExpense = `Interest Expense`) %>%
rename(IncomeTaxExpense = `Income Tax Expense`) %>%
rename(NetIncome= `Net Income`) %>%
rename(ProfitMargin = `Profit Margin`) %>%
rename(NetProfitMargin= `Net Profit Margin`) %>%
rename(TotalCurrentAssets = `Total current assets`) %>%
rename(TotalAssets = `Total assets`) %>%
rename(TotalDebt = `Total debt`) %>%
rename(TaxLiability = `Tax Liabilities`) %>%
rename(TotalNonCurrentAssests = `Net Debt`) %>%
rename(InvestementPurchSales = `Investment purchases and sales`) %>%
rename(AssetGrowth = `Asset Growth`) %>%
rename(DebtGrowth = `Debt Growth`)
CleanedTest
Filter out the data to only have select variables for analysis
NewTrain <-
CleanedTrain %>%
select(Name, netProfitMargin, Sector, RevenueGrowth, CostRevenue, GrossProfit, RDExpenses, SGAExpense, OpExpenses, OpIncome, InterestExpense, IncomeTaxExpense, NetIncome, ProfitMargin, NetProfitMargin, TotalCurrentAssets, TotalAssets, TotalDebt, TaxLiability, TotalNonCurrentAssests, InvestementPurchSales, AssetGrowth, DebtGrowth )
NewTrain
NewTest <-
CleanedTest%>%
select(Name, netProfitMargin, Sector, RevenueGrowth, CostRevenue, GrossProfit, RDExpenses, SGAExpense, OpExpenses, OpIncome, InterestExpense, IncomeTaxExpense, NetIncome, ProfitMargin, NetProfitMargin, TotalCurrentAssets, TotalAssets, TotalDebt, TaxLiability, TotalNonCurrentAssests, InvestementPurchSales, AssetGrowth, DebtGrowth )
NewTest
LS0tCnRpdGxlOiAiUHJvamVjdCBQaGFzZSAyIiAKc3VidGl0bGU6ICJEUzMxMCIKYXV0aG9yOiAiQXJ3YSBIYXJhcndhbGEsIEtyaXRoaWthIFNlbnRoaWwsICYgU3dhcmFsaSBLb3JnYW9ua2FyIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpJbnN0YWxsaW5nIHRoZSBsaWJyYXJpZXMgdGhhdCBhcmUgbmVlZGVkIHRvIGJlIGFibGUgdG8gbG9hZCBhbmQgcHJlZm9ybSBxdWVyaWVzIG9uIHRoZSBkYXRhc2V0LgpgYGB7cn0KbGlicmFyeShEYXRhQ29tcHV0aW5nKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShydmVzdCkKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkocmVhZHIpCmxpYnJhcnkoZ2dwbG90MikKYGBgCgoKIyMjIExvYWQgRGF0YQpHZXR0aW5nIHRoZSB0YWJsZXMgaW50byBSU3R1ZGlvIHRvIGJlIGFibGUgdG8gdXNlIHRoZW0gZm9yIGV2YWx1YXRpb24uIFdlIHdpbGwgZG8gdGhpcyB1c2luZyB0aGUgcmVhZCBmdW5jdGlvbnMuIApgYGB7cn0KZmlsZV9uYW1lIDwtIGZpbGUuY2hvb3NlKCkKc2FtcGxlU3VibWlzc2lvbiA8LSBkYXRhLnRhYmxlOjpmcmVhZChmaWxlX25hbWUpCnNhbXBsZVN1Ym1pc3Npb24KYGBgCgpgYGB7cn0KZmlsZV9uYW1lIDwtIGZpbGUuY2hvb3NlKCkKVHJhaW5EYXRhIDwtIGRhdGEudGFibGU6OmZyZWFkKGZpbGVfbmFtZSkKVHJhaW5EYXRhCmBgYAoKCgpgYGB7cn0KZmlsZV9uYW1lIDwtIGZpbGUuY2hvb3NlKCkKVGVzdERhdGEgPC0gZGF0YS50YWJsZTo6ZnJlYWQoZmlsZV9uYW1lKQpUZXN0RGF0YQpgYGAKCgojIyMgRGF0YSBXcmFuZ2xpbmcKCmBgYHtyfQoKQ2xlYW5lZFRyYWluIDwtCiAgVHJhaW5EYXRhICU+JQogIHJlbmFtZShSZXZlbnVlR3Jvd3RoID0gYFJldmVudWUgR3Jvd3RoYCkgJT4lCiAgcmVuYW1lKENvc3RSZXZlbnVlID0gYENvc3Qgb2YgUmV2ZW51ZWApICU+JQogIHJlbmFtZShHcm9zc1Byb2ZpdCA9IGBHcm9zcyBQcm9maXRgKSAlPiUKICByZW5hbWUoUkRFeHBlbnNlcyA9IGBSJkQgRXhwZW5zZXNgKSAlPiUKICByZW5hbWUoU0dBRXhwZW5zZT0gYFNHJkEgRXhwZW5zZWApICU+JQogIHJlbmFtZShPcEV4cGVuc2VzID0gYE9wZXJhdGluZyBFeHBlbnNlc2ApICU+JQogIHJlbmFtZShPcEluY29tZSA9IGBPcGVyYXRpbmcgSW5jb21lYCkgJT4lCiAgcmVuYW1lKEludGVyZXN0RXhwZW5zZSA9IGBJbnRlcmVzdCBFeHBlbnNlYCkgJT4lCiAgcmVuYW1lKEluY29tZVRheEV4cGVuc2UgPSBgSW5jb21lIFRheCBFeHBlbnNlYCkgJT4lCiAgcmVuYW1lKE5ldEluY29tZT0gYE5ldCBJbmNvbWVgKSAlPiUKICByZW5hbWUoUHJvZml0TWFyZ2luID0gYFByb2ZpdCBNYXJnaW5gKSAlPiUKICByZW5hbWUoTmV0UHJvZml0TWFyZ2luPSBgTmV0IFByb2ZpdCBNYXJnaW5gKSAlPiUKICByZW5hbWUoVG90YWxDdXJyZW50QXNzZXRzID0gYFRvdGFsIGN1cnJlbnQgYXNzZXRzYCkgJT4lCiAgcmVuYW1lKFRvdGFsQXNzZXRzID0gYFRvdGFsIGFzc2V0c2ApICU+JQogIHJlbmFtZShUb3RhbERlYnQgPSBgVG90YWwgZGVidGApICU+JQogIHJlbmFtZShUYXhMaWFiaWxpdHkgPSBgVGF4IExpYWJpbGl0aWVzYCkgJT4lCiAgcmVuYW1lKFRvdGFsTm9uQ3VycmVudEFzc2VzdHMgPSBgTmV0IERlYnRgKSAlPiUKICByZW5hbWUoSW52ZXN0ZW1lbnRQdXJjaFNhbGVzID0gYEludmVzdG1lbnQgcHVyY2hhc2VzIGFuZCBzYWxlc2ApICU+JQogIHJlbmFtZShBc3NldEdyb3d0aCA9IGBBc3NldCBHcm93dGhgKSAlPiUKICByZW5hbWUoRGVidEdyb3d0aCA9IGBEZWJ0IEdyb3d0aGApIApDbGVhbmVkVHJhaW4KCmBgYAoKYGBge3J9CkNsZWFuZWRUZXN0IDwtCiAgVGVzdERhdGEgJT4lCiAgcmVuYW1lKFJldmVudWVHcm93dGggPSBgUmV2ZW51ZSBHcm93dGhgKSAlPiUKICByZW5hbWUoQ29zdFJldmVudWUgPSBgQ29zdCBvZiBSZXZlbnVlYCkgJT4lCiAgcmVuYW1lKEdyb3NzUHJvZml0ID0gYEdyb3NzIFByb2ZpdGApICU+JQogIHJlbmFtZShSREV4cGVuc2VzID0gYFImRCBFeHBlbnNlc2ApICU+JQogIHJlbmFtZShTR0FFeHBlbnNlPSBgU0cmQSBFeHBlbnNlYCkgJT4lCiAgcmVuYW1lKE9wRXhwZW5zZXMgPSBgT3BlcmF0aW5nIEV4cGVuc2VzYCkgJT4lCiAgcmVuYW1lKE9wSW5jb21lID0gYE9wZXJhdGluZyBJbmNvbWVgKSAlPiUKICByZW5hbWUoSW50ZXJlc3RFeHBlbnNlID0gYEludGVyZXN0IEV4cGVuc2VgKSAlPiUKICByZW5hbWUoSW5jb21lVGF4RXhwZW5zZSA9IGBJbmNvbWUgVGF4IEV4cGVuc2VgKSAlPiUKICByZW5hbWUoTmV0SW5jb21lPSBgTmV0IEluY29tZWApICU+JQogIHJlbmFtZShQcm9maXRNYXJnaW4gPSBgUHJvZml0IE1hcmdpbmApICU+JQogIHJlbmFtZShOZXRQcm9maXRNYXJnaW49IGBOZXQgUHJvZml0IE1hcmdpbmApICU+JQogIHJlbmFtZShUb3RhbEN1cnJlbnRBc3NldHMgPSBgVG90YWwgY3VycmVudCBhc3NldHNgKSAlPiUKICByZW5hbWUoVG90YWxBc3NldHMgPSBgVG90YWwgYXNzZXRzYCkgJT4lCiAgcmVuYW1lKFRvdGFsRGVidCA9IGBUb3RhbCBkZWJ0YCkgJT4lCiAgcmVuYW1lKFRheExpYWJpbGl0eSA9IGBUYXggTGlhYmlsaXRpZXNgKSAlPiUKICByZW5hbWUoVG90YWxOb25DdXJyZW50QXNzZXN0cyA9IGBOZXQgRGVidGApICU+JQogIHJlbmFtZShJbnZlc3RlbWVudFB1cmNoU2FsZXMgPSBgSW52ZXN0bWVudCBwdXJjaGFzZXMgYW5kIHNhbGVzYCkgJT4lCiAgcmVuYW1lKEFzc2V0R3Jvd3RoID0gYEFzc2V0IEdyb3d0aGApICU+JQogIHJlbmFtZShEZWJ0R3Jvd3RoID0gYERlYnQgR3Jvd3RoYCkgCkNsZWFuZWRUZXN0CmBgYAoKIyMjIEZpbHRlciBvdXQgdGhlIGRhdGEgdG8gb25seSBoYXZlIHNlbGVjdCB2YXJpYWJsZXMgZm9yIGFuYWx5c2lzCmBgYHtyfQpOZXdUcmFpbiA8LQogIENsZWFuZWRUcmFpbiAlPiUKICBzZWxlY3QoTmFtZSwgbmV0UHJvZml0TWFyZ2luLCBTZWN0b3IsIFJldmVudWVHcm93dGgsIENvc3RSZXZlbnVlLCBHcm9zc1Byb2ZpdCwgUkRFeHBlbnNlcywgU0dBRXhwZW5zZSwgT3BFeHBlbnNlcywgT3BJbmNvbWUsIEludGVyZXN0RXhwZW5zZSwgSW5jb21lVGF4RXhwZW5zZSwgTmV0SW5jb21lLCBQcm9maXRNYXJnaW4sIE5ldFByb2ZpdE1hcmdpbiwgVG90YWxDdXJyZW50QXNzZXRzLCBUb3RhbEFzc2V0cywgVG90YWxEZWJ0LCBUYXhMaWFiaWxpdHksIFRvdGFsTm9uQ3VycmVudEFzc2VzdHMsIEludmVzdGVtZW50UHVyY2hTYWxlcywgQXNzZXRHcm93dGgsIERlYnRHcm93dGggKQoKTmV3VHJhaW4KYGBgCgpgYGB7cn0KTmV3VGVzdCA8LQogIENsZWFuZWRUZXN0JT4lCiAgc2VsZWN0KE5hbWUsIG5ldFByb2ZpdE1hcmdpbiwgU2VjdG9yLCBSZXZlbnVlR3Jvd3RoLCBDb3N0UmV2ZW51ZSwgR3Jvc3NQcm9maXQsIFJERXhwZW5zZXMsIFNHQUV4cGVuc2UsIE9wRXhwZW5zZXMsIE9wSW5jb21lLCBJbnRlcmVzdEV4cGVuc2UsIEluY29tZVRheEV4cGVuc2UsIE5ldEluY29tZSwgUHJvZml0TWFyZ2luLCBOZXRQcm9maXRNYXJnaW4sIFRvdGFsQ3VycmVudEFzc2V0cywgVG90YWxBc3NldHMsIFRvdGFsRGVidCwgVGF4TGlhYmlsaXR5LCBUb3RhbE5vbkN1cnJlbnRBc3Nlc3RzLCBJbnZlc3RlbWVudFB1cmNoU2FsZXMsIEFzc2V0R3Jvd3RoLCBEZWJ0R3Jvd3RoICkKCk5ld1Rlc3QKYGBgCgo=